import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq


def proj_data(img_file,all_image_paths,final_embeddings):
    if img_file == 'eval_images_q':
        csv_path = 'data/split_results/goods_images_eval_q.csv'
    if img_file == 'eval_images_v':
        csv_path = 'data/split_results/goods_images_eval_v.csv'
    if img_file == 'eval_images_o':
        csv_path = 'data/split_results/goods_images_eval_o.csv'
    
    path_df = pd.read_csv(csv_path)
    ans = {}
    for idx,row in path_df.iterrows():
        image_id = str(row['image_id'])
        image_url = str(row['image_url'])
        goods_id = str(row['goods_id'])
        # 找到 goods_id 对应的索引，如果找不到会抛出 ValueError
        try:
            id_index = all_image_paths.index(image_id)
            embedding = final_embeddings[id_index]
            ans[image_id] = {
                'image_id':image_id,
                'image_url':image_url,
                'goods_id':goods_id,
                'embedding':embedding
            }
        except ValueError:
            print(f"警告: 在提取的特征中未找到 goods_id '{goods_id}'。将跳过此条目。")
            continue
    return ans




